library(ggplot2)
library(dplyr)
library(reshape2)
library(fst)

# create empty table 

desc_table <-
  matrix(NA, nrow = 6, ncol = 6) 

colnames(desc_table) <- 
  c("Mean",
    "Std. dev.",
    "Running for municipality",
    "Elected for municipality",
    "Running for parliament", 
    "Elected for parliament")

rownames(desc_table) <-
  c("Women", 
    "Age",
    "Non-Danish origin (%)",
    "Years of education",
    "Earnings score ($z$-score)",
    "Observations")

#load earnings score data
load("main_data.rdata")

election_years <- c(1990, 1993, 1994, 1997, 1998,
                    2001, 2005, 2007, 2009, 2011,
                    2013, 2015)

# subset to election years
data_comp <- 
  data_comp %>% 
  # sample_frac(0.05)  %>% # for fitting
  filter(four_years %in% election_years) 

# merge on information on gender, age, ethnicitity and education. 

data_comp_desc <- data_frame()

for (i in election_years){
  bef_i <- read.fst(paste("bef", i, ".fst", sep = ""))
  edu_i <- read.fst(paste("udda_recoded", i, ".fst", sep = ""))
  
  year_data <- 
    data_comp %>% 
    filter(four_years == i) %>% 
    left_join(., bef_i, by = "PNR") %>% 
    left_join(., edu_i, by = "PNR")
  
  data_comp_desc <- 
    bind_rows(data_comp_desc, year_data)
  print(i)
}

data_comp_desc <- 
  data_comp_desc %>% 
  filter(STATSB == 5100) %>% # limit to only DK citizens 
  mutate(non_danish = IE_TYPE != 1,
         female = KOEN == 2,
         edu_years = ifelse(education == "Grundskole", 9, 
                            ifelse(education == "Erhvervsfaglige praktik- og hovedforl?b", 13,
                                   ifelse(education == "Almengymnasiale uddannelser", 12, 
                                          ifelse(education == "Erhvervsgymnasiale uddannelser", 12, 
                                                 ifelse(education == "Korte videreg?ende uddannelser", 14, 
                                                        ifelse(education == "Mellemlange videreg?ende uddannelser", 15,  
                                                               ifelse(education == "Bachelor", 15,  
                                                                      ifelse(education == "Lange videreg?ende uddannelser", 17,
                                                                             ifelse(education == "Forskeruddannelser", 20, NA)))))))))) 

 
# find #observations, mean, and std. dev. 
desc_table[6, 1] <- nrow(data_comp_desc)
desc_table[5, 1] <- mean(data_comp_desc$inc_res)
desc_table[5, 2] <- sd(  data_comp_desc$inc_res)
desc_table[4, 1] <- mean(data_comp_desc$edu_years, na.rm = TRUE)
desc_table[4, 2] <- sd(  data_comp_desc$edu_years, na.rm = TRUE)
desc_table[3, 1] <- mean(data_comp_desc$non_danish, na.rm = TRUE)
desc_table[3, 2] <- sd(  data_comp_desc$non_danish, na.rm = TRUE)
desc_table[2, 1] <- mean(data_comp_desc$ALDER, na.rm = TRUE)
desc_table[2, 2] <- sd(  data_comp_desc$ALDER, na.rm = TRUE)
desc_table[1, 1] <- mean(data_comp_desc$female, na.rm = TRUE)
desc_table[1, 2] <- sd(  data_comp_desc$female, na.rm = TRUE)


data_local_run <- 
  data_comp_desc %>% 
  filter(run_kv_1993 & four_years == 1993 |
           run_kv_1997 & four_years == 1997 | 
           run_kv_2001 & four_years == 2001 | 
           run_kv_2005 & four_years == 2005 | 
           run_kv_2009 & four_years == 2009 | 
           run_kv_2013 & four_years == 2013)



data_local_elected <-
  data_comp_desc %>%
  filter(elected_kv_1993   & four_years == 1993 |
           elected_kv_1997 & four_years == 1997 |
           elected_kv_2001 & four_years == 2001 |
           elected_kv_2005 & four_years == 2005 |
           elected_kv_2009 & four_years == 2009 |
           elected_kv_2013 & four_years == 2013)

data_mp_run <-
  data_comp_desc %>%
  filter(run_fv_1990_FV   & four_years == 1990 | 
           run_fv_1994_FV & four_years == 1994 |
           run_fv_1998_FV & four_years == 1998 |
           run_fv_2001_FV & four_years == 2001 |
           run_fv_2005_FV & four_years == 2005 |
           run_fv_2007_FV & four_years == 2007 |
           run_fv_2011_FV & four_years == 2011 |
           run_fv_2015_FV & four_years == 2015)

data_mp_elected <-
  data_comp_desc %>%
  filter(elected_fv_1990 & four_years == 1990 |
           elected_fv_1994 & four_years == 1994 |
           elected_fv_1998 & four_years == 1998 |
           elected_fv_2001 & four_years == 2001 |
           elected_fv_2005 & four_years == 2005 |
           elected_fv_2007 & four_years == 2007 |
           elected_fv_2011 & four_years == 2011 |
           elected_fv_2015 & four_years == 2015)

desc_table[6, 3] <- nrow(data_local_run)
desc_table[5, 3] <- mean(data_local_run$inc_res)
desc_table[4, 3] <- mean(data_local_run$edu_years, na.rm = TRUE)
desc_table[3, 3] <- mean(data_local_run$non_danish, na.rm = TRUE)
desc_table[2, 3] <- mean(data_local_run$ALDER, na.rm = TRUE)
desc_table[1, 3] <- mean(data_local_run$female, na.rm = TRUE)

desc_table[6, 4] <- nrow(data_local_elected)
desc_table[5, 4] <- mean(data_local_elected$inc_res)
desc_table[4, 4] <- mean(data_local_elected$edu_years, na.rm = TRUE)
desc_table[3, 4] <- mean(data_local_elected$non_danish, na.rm = TRUE)
desc_table[2, 4] <- mean(data_local_elected$ALDER, na.rm = TRUE)
desc_table[1, 4] <- mean(data_local_elected$female, na.rm = TRUE)

desc_table[6, 5] <- nrow(data_mp_run)
desc_table[5, 5] <- mean(data_mp_run$inc_res)
desc_table[4, 5] <- mean(data_mp_run$edu_years, na.rm = TRUE)
desc_table[3, 5] <- mean(data_mp_run$non_danish, na.rm = TRUE)
desc_table[2, 5] <- mean(data_mp_run$ALDER, na.rm = TRUE)
desc_table[1, 5] <- mean(data_mp_run$female, na.rm = TRUE)

desc_table[6, 6] <- nrow(data_mp_elected)
desc_table[5, 6] <- mean(data_mp_elected$inc_res)
desc_table[4, 6] <- mean(data_mp_elected$edu_years, na.rm = TRUE)
desc_table[3, 6] <- mean(data_mp_elected$non_danish, na.rm = TRUE)
desc_table[2, 6] <- mean(data_mp_elected$ALDER, na.rm = TRUE)
desc_table[1, 6] <- mean(data_mp_elected$female, na.rm = TRUE)

desc_table[1:5,] <- round(desc_table[1:5,], 2)

